/*****************************************************************************
State of Aadhaar Survey 2017-2018

Title: 00_genvar_nonroster_pub.do
Author: IDinsight
Contact: stateofaadhaar@idinsight.org
Date: 29 August 2018
Data: "SOA2018_nonroster_cleaned.dta" -- cleaned, NON roster survey data
Description: 	This .do file generates variables to be used in the analysis
				of the 2018 State of Aadhaar survey data, for the NON roster
				component of the survey (i.e. questions on the respondent or
				their household as a whole, as opposed to each household member).
				It saves output to "SOA2018_nonroster_cleaned_gen.dta"
				as well as "SOA2018_nonroster_cleaned_gen.csv".

Contents:

	0. Creating categories of household and respondent characteristics used in
	regressions
	(The following sections generate variables for analysis for each section of 
	the 2018 State of Aadhaar report.)
	1. Enrolment
	2. Data quality
	3. General usage
	4. Banking
	5. Mobile
	6. PDS
	7. User awareness
	8. NREGA
	
	9. Remaining processing
	
Missing data code:
	.r = refused
	.d = don't know	
*****************************************************************************/

	
* Setting up
	
	version 14
	capture log close
	clear all
	mac drop _all
	set more off

	* Please replace "..." below with the correct file path on your computer
	if "`c(os)'"=="MacOSX"{
		global dir "/Users/`c(username)'/.../SOA2018_data_release/"
		}
	else{
		global dir "C:/Users/`c(username)'/.../SOA2018_data_release/"
		}

	cd "${dir}/Data_sets/"
	use "SOA2018_nonroster_cleaned.dta", clear
	

/*****************************************************************************
0. Creating categories of household and respondent characteristics used in regressions
*****************************************************************************/

	* To generate: sc_cat st_cat rel_muslim majority_female_HH maxedu_noschool resp_noschool resp_female resp_above60

	* Caste (of household and household member)

		gen gen_cat = (category == 1) if !missing(category)
		gen sc_cat = (category == 2) if !missing(category)
		gen st_cat = (category == 3) if !missing(category)
		gen obc_cat = (category == 4) if !missing(category)
		
		label variable gen_cat "General category"
		label variable sc_cat "SC category"
		label variable st_cat "ST category"
		label variable obc_cat "OBC Category"
		
	* Religion (of household and household member)
		
		gen rel_muslim = (religion == 2) if !missing(religion)
		label variable rel_muslim "Muslim household"

	* Gender: whether household has majority female among adults
			
		gen majority_female_HH = (prop_female_adults >= 50) if prop_female_adults <= 100 & !missing(prop_female_adults)
		label variable majority_female_HH "Majority adults are female in HH"
		
	* Gender of respondnet
		
		gen resp_female = (gender1 == 2) if !missing(gender1)
		label variable resp_female "Female respondent"
			
	* Education: whether no member has any schooling		

		egen maximum_edu_HH = rowmax(education? education??)
		* 1 = not literate
		* 2 = no schooling but literate

		gen maxedu_noschool = (maximum_edu_HH <= 2) if !missing(maximum_edu_HH)
		label variable maxedu_noschool "No member of the household is educated"
		
	* Education: respondent has no schooling
	
		gen resp_noschool = (education1 <= 2) if !missing(education1)
		label variable resp_noschool "Respondent has no schooling"
		
	* Age: respondent is above age 60
	
		gen resp_above60 = (age1 > 60) if age1 >= 18 & !missing(age1)
		label variable resp_above60 "Respondent above age 60"


		
/*****************************************************************************
1. Enrolment
*****************************************************************************/
				
	* Recoding scale to match across all three states
		* In AP we used a 5 point scale but in Rajasthan and West Bengal we had a three point scale	
		recode ad_enease (1=2) (5=4)
		
	* Generating a dummy variable indicating whether someone has an additional ID at the time of enrolment
		gen additionalID = aadhaar_fm1
		replace additionalID = 0 if !missing(additionalID)
		replace additionalID = 1 if ad_enid_4 == 1 | ad_enid_5 == 1 |ad_enid_6 == 1 | ad_enid_7 == 1 | ///
			ad_enid_8 == 1 | ad_enid_9 == 1 | ad_enid_11 == 1 | ad_enid_12 == 1 | ad_enid_13 == 1 | ///
			ad_enid_14 == 1 | ad_enid_15 == 1 | ad_enid_100 == 1 | ad_enid_101 == 1 | ad_enid_102 == 1
		replace additionalID = .d if ad_enid__999 == 1 
		replace additionalID = . if aadhaar_fm1 != 1 
		label variable additionalID "Additional IDs"
		label values additionalID yesno
		recast int additionalID
		
	* Creating variables for reasons why people opted into Aadhaar 
		gen aadhaarwhy_1 = 1 if aadhaar_why_1 == 1 | aadhaar_why_2 == 1
		label variable aadhaarwhy_1 "Government/external impetus"
		
		gen aadhaarwhy_2 = 1 if aadhaar_why_3 == 1 | aadhaar_why_4 == 1
		label variable aadhaarwhy_2 "Access impetus"
		
		gen aadhaarwhy_3 = 1 if aadhaar_why_5 == 1 | aadhaar_why_100 == 1
		label variable aadhaarwhy_3 "Social network impetus"
		
		gen aadhaarwhy_4 = 1 if aadhaar_why_6 == 1 | aadhaar_why_7 == 1 | aadhaar_why_9 == 1
		label variable aadhaarwhy_4 "Identity document impetus"
		
		gen aadhaarwhy_5 = 1 if aadhaar_why_8 == 1 
		label variable aadhaarwhy_5 "Other/no impetus"
		
		local aadhaarwhy aadhaarwhy_1 aadhaarwhy_2 aadhaarwhy_3 aadhaarwhy_4 aadhaarwhy_5
		foreach var in `aadhaarwhy' {
			replace `var' = 0 if `var' != 1 & aadhaar_fm1 == 1
			replace `var' =.d if aadhaar_why__999 == 1
			label values `var' yesno
			recast int `var'
			}		
			
	* Creating categories for payment values for getting Aadhaar
		gen payscale = ad_payrs	// capturing .d and .r
		replace payscale = 1 if ad_payrs < 50
		replace payscale = 2 if ad_payrs >= 50 & ad_payrs <= 200
		replace payscale = 3 if ad_payrs > 200 & !missing(ad_payrs)
		
		label define payscale 1 "Less than 50" 2 "50 to 200" 3 "Above 200"
		label values payscale payscale
		order payscale, after (ad_payrs)
		label variable payscale "How much did you have to pay to get your Aadhaar card?"
		
		
/*****************************************************************************
2. Data quality
*****************************************************************************/			

	* Recoding scale to match across all three states
		* In AP we used a 5 point scale but in Rajasthan and West Bengal we had a three point scale	
		recode aadhaar_etfixease (1=2) (5=4)
		recode aadhaar_updateease (1=2) (5=4)
		
	* Generating a variable for having duplicate Aadhaar
		* the variable "ad_duplicatesame" indicates among those who have 2 Aadhaars whether they have the same number
		* we also need to include those who have only 1 Aadhaar (and hence no duplicate)	
		gen duplicateaadhaar = ad_duplicate
		replace duplicateaadhaar = 0 if !missing(duplicateaadhaar)
		replace duplicateaadhaar = 1 if  ad_duplicatesame == 0
		replace duplicateaadhaar = .d if ad_duplicatesame == .d
		replace duplicateaadhaar = 0 if duplicateaadhaar == 1 & state == 3
		* We did a back check and it turns out that the duplicate in West Bengal isn't actually a duplicate
		label values duplicateaadhaar yesno
		
	* Generating a variable for having duplicate voter ID
		* the variable "vid_duplicatesame" indicates among those who have 2 voter IDs whether they have the same number
		* we also need to include those who have only 1 voter ID (and hence no duplicate)
		gen duplicatevid = vid_duplicate
		replace duplicatevid = 0 if !missing(duplicatevid)
		replace duplicatevid = 1 if  vid_duplicatesame == 0
		replace duplicatevid = .d if vid_duplicatesame == .d
		label values duplicatevid yesno		
		
		
/*****************************************************************************
3. General usage
*****************************************************************************/			
		
	* Generating variables indicating awareness levels for all types of authentication
		* the survey questions only concern those who have not used a particular type of authentication
		* hence we need to include those who have used them also as those who are aware
		
		gen ad_fprintaware_all = ad_fprintaware if aadhaar_fm1 == 1
		replace ad_fprintaware_all = 1 if ad_use_3 == 1 
		order ad_fprintaware_all, after(ad_fprintaware)
		label values ad_fprintaware_all yesnorefused
		label variable ad_fprintaware_all "An individual aware of Fingerprint auth mechanism"
		
		gen ad_irisaware_all = ad_irisaware if aadhaar_fm1 == 1
		replace ad_irisaware_all = 1 if ad_use_4 == 1
		label values ad_irisaware_all yesnorefused
		order ad_irisaware_all, after(ad_irisaware)
		label variable ad_irisaware_all "An individual aware of Iris auth mechanism"
		
		gen ad_otpaware_all = ad_otpaware if aadhaar_fm1 == 1
		replace ad_otpaware_all = 1 if ad_use_5 == 1
		label values ad_otpaware_all yesnorefused
		order ad_otpaware_all, after(ad_otpaware)
		label variable ad_otpaware_all "An individual aware of OTP auth mechanism"
		
	* Generating a variable indicating individuals who are aware of all authentication mechanisms
		
		gen ad_aware_all = 0 
		replace ad_aware_all = 1 if ad_fprintaware_all == 1 & ad_irisaware_all == 1 & ad_otpaware_all == 1
		replace ad_aware_all = . if missing(ad_fprintaware_all) | missing(ad_irisaware_all) | missing(ad_otpaware_all)
		replace ad_aware_all = . if ad_fprintaware_all == .d | ad_fprintaware_all == .r | ad_irisaware_all == .d | ad_irisaware_all == .r | ad_otpaware_all == .d | ad_otpaware_all == .r
		label values ad_aware_all yesno
		order ad_aware_all, after(ad_otpaware_all)
		label variable ad_aware_all "An individual aware of all 3 auth mechanisms"
		
	* Labelling generated variables 
	
		label define awareness 1 "Yes" 0 "No"
		label values ad_fprintaware_all awareness
		label values ad_irisaware_all awareness
		label values ad_otpaware_all awareness
		label values ad_irisaware_all awareness	
		
		
/*****************************************************************************
4. Banking
*****************************************************************************/			
				
	* Categories for number of bank accounts
		* with a modification for DBT, where we want to know whether they have one ore more than one
		
		gen banknumcategories = bank_no if bank_no==1 | bank_no==2
		replace banknumcategories = 3 if bank_no >2 & !missing(bank_no)
		replace banknumcategories=bank_no if missing(bank_no)
		
		label define banknumcategories 1 "Only one account" 2 "Two accounts" 3 "More than two accounts"
		label values banknumcategories banknumcategories
		label var banknumcategories "How many bank accounts do you have?"
		
		gen banknumcategories_dbt = 1 if bank_no == 1
		replace banknumcategories_dbt = 2 if bank_no>=2 & !missing(bank_no)
		replace banknumcategories_dbt=bank_no if missing(bank_no)
		
		label define banknumcategoriesdbt 1 "Only one account" 2 "More than one account"
		label values banknumcategories_dbt banknumcategoriesdbt
		label var banknumcategories_dbt "How many bank accounts do you have?"
		
	* Categories for time taken to open bank account
		* combining those with single and multiple accounts
		
		gen bankopencategories=1 if bank_opentime_single==1 | bank_opentime_multiple==1
		replace bankopencategories=2 if bank_opentime_single==2 | bank_opentime_multiple==2 | bank_opentime_single==3 | bank_opentime_multiple==3
		replace bankopencategories=3 if bank_opentime_single==4 | bank_opentime_multiple==4 | bank_opentime_single==5 | bank_opentime_multiple==5
		replace bankopencategories=4 if bank_opentime_single==6 | bank_opentime_multiple==6
		replace bankopencategories=.d if bank_opentime_single==.d | bank_opentime_multiple==.d
		replace bankopencategories=.r if bank_opentime_single==.r | bank_opentime_multiple==.r 
		replace bankopencategories=. if bank_opentime_single==. & bank_opentime_multiple==.
		
		label define bankopencategories 1 "1 day" 2 "2-6 days" 3 "7-15 days" 4 "More than 15 days"
		label values bankopencategories bankopencategories
		label var bankopencategories "How long did it take to open this bank account (most recent account for those who have more than 1 account)?"
		
	* Categories for year in which bank account was opened
		* combining those with single and multiple accounts		
		gen bankyearcategories=2 if bank_year_single==1 | bank_year_single==2 | bank_year_multiple==1 | bank_year_multiple==2
		replace bankyearcategories=3 if bank_year_single==3 | bank_year_multiple==3
		replace bankyearcategories=4 if bank_year_single==4 | bank_year_multiple==4
		replace bankyearcategories=5 if bank_year_single==5 | bank_year_multiple==5
		replace bankyearcategories=6 if bank_year_single==6 | bank_year_multiple==6
		replace bankyearcategories=.d if bank_year_single==.d | bank_year_multiple==.d
		replace bankyearcategories=.r if bank_year_single==.r | bank_year_multiple==.r
		replace bankyearcategories=. if bank_year_single==. & bank_year_multiple==.
		
		label values bankyearcategories bank_year_single
		label var bankyearcategories "When did you open your bank account (most recent account for those who have more than 1 account)?"
		
	* Generating a dummy variable indicating that bank account was opened in/after 2014
		
		gen bank2014=1 if bankyearcategories==2 | bankyearcategories==3 | bankyearcategories==4
		replace bank2014=0 if bankyearcategories==5 | bankyearcategories==6
		replace bank2014=bankyearcategories if missing(bankyearcategories)
		
		label define bank2014 1 "Opened in/after 2014" 0 "Opened before 2014"
		label values bank2014 bank2014
		label var bank2014 "Was the bank account opened after 2014?"
		
	* Generating a dummy variable indicating those with Aadhaar, mobile and bank account
		
		gen jamcandidate=1 if aadhaar_fm1==1 & mobile_fm1==1 & bank==1
		replace jamcandidate=0 if aadhaar_fm1==0 | mobile_fm1==0 | bank==0
		replace jamcandidate=. if aadhaar_fm1==.d | aadhaar_fm1==.r | mobile_fm1==.d | mobile_fm1==.r | bank==.d | bank==.r
		label var jamcandidate "Does the respondent have all the three basic components of JAM (Aadhaar + mobile + bank account)?"
		label values jamcandidate yesno
		
	* Generating a dummy variable indicating usage of micro-ATM (in NREGA and otherwise)
			
		gen microatm = nrega_micro
			* Respondent has used a micro-ATM or NREGA wages in the last 6 months (asked first in Andhra Pradesh)
		replace microatm = microgeneral if nrega_micro != 1
			* Respondent has used a micro-ATM in the last 3 months		
		replace microatm = nrega_micro if nrega_micro == 0 & microgeneral == .
			* Respondents without a bank account who had not used a microATM in the last 6 months 		label values microatm yesno
		label var microatm "Did you use a micro-ATM in the last 3 months or use a micro-ATM for NREGA wages in the last 6 months?"	
		label values microatm yesno
		
		
	* Generating a dummy variable indicating the different IDs respondents had that are valid for opening bank accounts
		
		* IDs that serve as a legitimate proof of identity
		gen bankidpoi=1 if ad_enid_1==1 /* NREGA */
		replace bankidpoi=1 if ad_enid_3==1 /* Voter ID */
		replace bankidpoi=1 if ad_enid_5==1 /* Driving License */
		replace bankidpoi=1 if ad_enid_6==1 /* PAN card */
		replace bankidpoi=1 if ad_enid_9==1 /* Letter from gov authority */
		replace bankidpoi=1 if ad_enid_11==1 /* Passport */
		replace bankidpoi=0 if bankidpoi==. & ad_enid!=""
		replace bankidpoi=.d if ad_enid__999==1 /* Note: correcting for "Don't know" in the ad_enid question */
		replace bankidpoi=. if aadhaar_fm1==0 | missing(aadhaar_fm1) /* Note: if the respondent does not have an Aadhaar (or) variable is missing, ///
			this variable should be missing */		
		label values bankidpoi yesno
		label var bankidpoi "Did the respondent possess a legitimate proof of identity for bank openings at the time of Aadhaar enrolment?"
		
		* IDs that serve as legitimate proof of address *
		gen bankidpoa=1 if ad_enid_2==1 /* Ration card */
		replace bankidpoa=1 if ad_enid_8==1 /* Bank statement */
		replace bankidpoa=1 if ad_enid_9==1 /* Letter from gov authority */
		replace bankidpoa=0 if bankidpoa==. & ad_enid!=""
		replace bankidpoa=.d if ad_enid__999==1 /* Note: correcting for "Don't know" in the ad_enid question */
		replace bankidpoa=. if aadhaar_fm1==0 | missing(aadhaar_fm1) /* Note: if the respondent does not have an Aadhaar (or) variable is missing, ///
			this variable should be missing */		
		label values bankidpoa yesno
		label var bankidpoa "Did the respondent possess a legitimate proof of address for bank openings at the time of Aadhaar enrolment?"
		
	* Generating a dummy variable indicating whether the bank account is seeded
		* combining those with single and multiple accounts		
		
		gen bankseeded=1 if bank_aadhaar_single==1 | (bank_aadhaar_multiple>0 & !missing(bank_aadhaar_multiple))
		replace bankseeded=0 if bank_aadhaar_single==0 | bank_aadhaar_multiple==0
		replace bankseeded=-999 if bank_aadhaar_single==.d | bank_aadhaar_multiple==.d
			* including a value for don't know since we want to know how many people are unaware whether their bank accounts are seeded with Aadhaar 
		replace bankseeded=.r if bank_aadhaar_single==.r | bank_aadhaar_multiple==.r
		label values bankseeded yesnodk
		label var bankseeded "Is your bank account seeded with Aadhaar?"
		
	* Generating a variable indicating the ease for opening bank account
		* combining those with single and multiple accounts		
		
		gen bankopen_ease=bankopen_ease_single
		replace bankopen_ease=bankopen_ease_multiple if bankopen_ease==.
		recode bankopen_ease (1=2) (5=4) /* Note: converting the ease from a 5-point scale (used only in AP) to a 3-point scale (used in RJ & WB) */
		recode bankopen_ease (2=3) (3=2) (4=1) /* Note: convering the three point scale to 1-3 (difficult-neutral-easy) so that we can run
			regressions treating bankopen_ease as a continuous variable */
		label define bankopenease_threepoint 3 "Easy" 2 "Neutral" 1 "Difficult"
		label var bankopen_ease "Overall, how easy or difficult was it for you to open your bank account?"
		label values bankopen_ease bankopenease_threepoint
		
	* Generating a dummy variable indicating whether DBT is sent into an Aadhaar seeded bank account
		* combining those with single and multiple accounts		
		
		gen dbt_aadhaarseeded=1 if bank_aadhaar_single==1 & dbt==1 /* if respondent only has one account, and that account
			is seeded, all DBTs received is received to an Aadhaar-seeded account */
		replace dbt_aadhaarseeded=0 if bank_aadhaar_single==0 & dbt==1 /* Similarly, if that account is not seeded, DBT sent 
			to a non-Aadhaar-seeded account */
		replace dbt_aadhaarseeded=1 if dbt_oneaccount_seeded==1 & dbt==1 /* if respondent has multiple accounts, and
			the account that he/she receives is Aadhaar-seeded, DBTs received is received to an Aadhaar-seeded account */
		replace dbt_aadhaarseeded=0 if dbt_oneaccount_seeded==0 & dbt==1 /* Similarly, if that account is not seeded, DBT
			sent to a non-Aadhaar-seeded account */
		replace dbt_aadhaarseeded=1 if (bank_no==bank_aadhaar_multiple) & dbt==1 /* for those who have multiple accounts
			and all of them are seeded, then the DBT is transferred to an Aadhaar-seeded account */
		replace dbt_aadhaarseeded=2 if dbt_oneaccount==0 & dbt==1 /* If respondent receives DBT but receives them into
			multiple account, and we are not sure whether all of these are Aadhaar-seeded or not, coding these as "Unsure" */
		replace dbt_aadhaarseeded=2 if missing(dbt_aadhaarseeded) & missing(bank_aadhaar_single) & dbt==1
		replace dbt_aadhaarseeded=2 if dbt==1 & dbt_aadhaarseeded==. /* These remaining instances are coded as missing
			as these are all the instances where the person does not know how many bank account one has or is not sure whether they receive their DBTs ///
			into one account, or is not sure whether the one account they receive their DBTs in is Aadhaar-seeded or not */
		
		label define dbtaadhaarseeded 1 "Receive into Aadhaar-seeded bank account" 0 "Do not receive into Aadhaar-seeded bank account" 2 "Unable to determine whether DBT is received in an Aadhaar-seeded account or not"
		label var dbt_aadhaarseeded "Do you receive this DBT into an Aadhaar seeded bank account?"
		label values dbt_aadhaarseeded dbtaadhaarseeded
		
	* Generating a dummy variable indicating whether the respondent used Aadhaar to open their bank account
		* combining those with single and multiple accounts	
		
		gen ad_bankopen=ad_bankopen_single
		replace ad_bankopen=ad_bankopen_multiple if ad_bankopen==. 
		replace ad_bankopen = .d if ad_bankopen == -999	
		replace ad_bankopen=0 if ad_bankopen==2 
			/* ad_bankopen==2 is an option for
			when the respondent uses a "Bhamashah" card – this was added since enumerators were getting confused on what
			to pick when respondents mentioned they were using Bhamashah cards; these are non-Aadhaar, so recoding them as non-Aadhaar now */	
		label var ad_bankopen "Did you use Aadhaar to open your bank account?"
		label values ad_bankopen yesno
		
	* Generating a dummy variable indicating whether someone with bank accounts has one or multiple accounts
		gen singlebankaccount=1 if bank_no==1
		replace singlebankaccount=0 if bank_no>1 & !missing(bank_no)
		replace singlebankaccount=bank_no if missing(bank_no) & missing(singlebankaccount)
		label var singlebankaccount "Have only one bank account"
		label values singlebankaccount yesnodk
				
		label define yesnodonotknow -999 "Do not know" 0 "No" 1 "Yes"
		replace pmjdy=-999 if pmjdy==.d /* Note: Bringing back the "Don't know code for this since for this question,
			it is relevant to know how many people don't know whether their bank accounts are PMJDY accounts or not. This
			question is only asked to people who have a bank account. */
		tab pmjdy, gen(pmjdy)
		label var pmjdy1 "Do not know whether PMJDY or not"
		label var pmjdy2 "Do not have PMJDY"
		label var pmjdy3 "Have PMJDY"
		forvalues i=1/3{
			replace pmjdy`i'=pmjdy if missing(pmjdy`i')
			label values pmjdy`i' yesno
			}
		label values pmjdy yesnodonotknow
		
		tab bankseeded, gen(bankseeded)
		label var bankseeded1 "Don't know/Don't remember"
		label var bankseeded2 "Bank account not seeded with Aadhaar"
		label var bankseeded3 "Bank account seeded with Aadhaar"
		forvalues i=1/3{
			replace bankseeded`i'=bankseeded if missing(bankseeded`i')
			label values bankseeded`i' yesno
			}
		label values bankseeded yesnodonotknow
		
		tab bankopencategories, gen(bankopencategories)
		label var bankopencategories1 "1 day"
		label var bankopencategories2 "2-6 days"
		label var bankopencategories3 "7-15 days"
		label var bankopencategories4 "More than 15 days"
		forvalues i=1/4{
			replace bankopencategories`i'=bankopencategories if missing(bankopencategories`i')
			label values bankopencategories`i' yesno
			}
		
		tab bankopen_ease, gen(bankopen_ease)
		label var bankopen_ease1 "Difficult"
		label var bankopen_ease2 "Neutral"
		label var bankopen_ease3 "Easy"
		forvalues i=1/3{
			replace bankopen_ease`i'=bankopen_ease if missing(bankopen_ease`i')
			label values bankopen_ease`i' yesno
		}
		
		replace ad_bankopenhow=0 if ad_bankopen==0
		replace ad_bankopenhow=ad_bankopen if missing(ad_bankopen) /* Note: carrying over the ".d" ".r" ".e" from ad_bankopen */
		label define ad_bankopenhow 0 "Did not use Aadhaar", modify
		
		tab ad_bankopenhow, gen(ad_bankopenhow)
		label var ad_bankopenhow1 "Don't know/Don't remember"
		label var ad_bankopenhow2 "Used Aadhaar as ID"
		label var ad_bankopenhow3 "Used Aadhaar e-KYC"
		forvalues i=1/3{
			replace ad_bankopenhow`i'=ad_bankopenhow if missing(ad_bankopenhow`i')
			label values ad_bankopenhow`i' yesno
		}
		
		label var dbt "Do you receive any direct transfers from government schemes?"		
				
		
/*****************************************************************************
5. Mobile
*****************************************************************************/						

		* Creating categories for time taken to activate mobile SIM
		gen simactivatecategories=ad_mobilesimtime
		recode simactivatecategories (3=2) (4=3) (5=3) (6=4)
		
		label define simactivatecategories 1 "1 day" 2 "2-6 days" 3 "7-15 days" 4 "More than 15 days"
		label values simactivatecategories simactivatecategories
		label var simactivatecategories "How long did it take to activate your SIM card?"
		
		* The following categories are used in regression analysis
		tab simactivatecategories, gen(simactivatecategories)
		label var simactivatecategories1 "1 day"
		label var simactivatecategories2 "2-6 days"
		label var simactivatecategories3 "7-15 days"
		label var simactivatecategories4 "More than 15 days"
		forvalues i=1/4{
			label values simactivatecategories`i' yesno
			replace simactivatecategories`i'=simactivatecategories if missing(simactivatecategories`i')
		}
		
		* Generating a dummy variable indicating those who did not use Aadhaar in obtaining mobile SIM
		replace ad_mobilesimhow=0 if ad_mobilesim==0
		replace ad_mobilesimhow=ad_mobilesim if missing(ad_mobilesimhow) & missing(ad_mobilesim)
		label define admobilesimhow 0 "Did not use Aadhaar", modify	
		
		tab ad_mobilesimhow, gen(ad_mobilesimhow)
		label var ad_mobilesimhow1 "Did not use Aadhaar"
		label var ad_mobilesimhow2 "Used Aadhaar as ID"
		label var ad_mobilesimhow3 "Used Aadhaar e-KYC"
		forvalues i=1/3{
			label values ad_mobilesimhow`i' yesno
			replace ad_mobilesimhow`i'=ad_mobilesimhow if missing(ad_mobilesimhow`i')
		}
		
		* Generating a dummy variable indicating those who have used OTP or are aware of it 
		gen otpuseaware=1 if ad_use_5==1 | ad_otpaware==1
		replace otpuseaware=0 if ad_use_5==0 & ad_otpaware==0
		replace otpuseaware=. if ad_use_5==.d | ad_use_5==.r | ad_otpaware==.d | ad_otpaware==.r
		label values otpuseaware yesno
		label var otpuseaware "Have you used or are aware of the mobile OTP feature for Aadhaar?"
		
		* Other cleaning
		replace mobile_aadhaar=-999 if mobile_aadhaar==.d /* Bringing back the "don't know" for mobile_aadhaar since it ///
			is relevant to know how many people do not know whether their phones are seeded with Aadhaar or not */
		tab mobile_aadhaar, gen(mobile_aadhaar)
		label var mobile_aadhaar1 "Don't know / don't remember"
		label var mobile_aadhaar2 "No"
		label var mobile_aadhaar3 "Yes"
		forvalues i=1/3{
			label values mobile_aadhaar`i' yesno
			replace mobile_aadhaar`i'=mobile_aadhaar if missing(mobile_aadhaar`i')
			}
		label values mobile_aadhaar yesnodonotknow
		
		label var pre_ekyc "Got SIM card before/after Sep 2016"
		label define preekyc 0 "In/after Sep 2016" 1 "Before Sep 2016"
		label values pre_ekyc preekyc		
	
	
/*****************************************************************************
6. PDS
*****************************************************************************/		
		
	* Creating categories for number of ration cards
		gen rc_num_C =.
		replace rc_num_C = 1 if rationcard_hh == 0 
		replace rc_num_C = 2 if rc_num == 1
		replace rc_num_C = 3 if rc_num == 2
		replace rc_num_C = 4 if rc_num > 2 & !missing(rc_num)
		label define rc_num_C  1 "Zero" 2 "One" 3 "Two" 4 " Greater than 2"
		label values rc_num_C rc_num_C
		label variable rc_num_C "Number of ration cards that a HH has"
		replace rc_num_C = .d if rc_num == .d
		
	* Recoding scale to match across all three states
		* In AP we used a 5 point scale but in Rajasthan and West Bengal we had a three point scale	
		recode systemcompare (1=2) (6=5) (4=.d)
		recode ad_overallease (1=2) (5=4)
		
	* Recoding categories for number of attempts for regression analysis
		gen rcnumofvisits_reg = rcnumofvisits
		replace rcnumofvisits = 5 if rcnumofvisits > 4 & !missing(rcnumofvisits)
		label define rcnumofvisits 5 ">4"
		label values rcnumofvisits rcnumofvisits
		
		gen rationlast3months_prop = rationlast3months
		replace rationlast3months_prop = 6 if rationlast3months_prop > 3 & !missing(rationlast3months_prop)
		label define rationlast3months_prop 1 "One" 2"Two" 3"Three" 6"More than three"
		label values rationlast3months_prop rationlast3months_prop
		label variable rationlast3months_prop "In the last three months, on average, how many times has it taken you (or) another member of the household for successful fingerprint authentication?"
		
	* Generating variables on Aadhaar-related exclusion		
		
		* Step 1: Exclusion from PDS (in the past 3 months)
		
			* Exclusion in AP and RJ
			gen exclusion_AP_RJ = .
			replace exclusion_AP_RJ = ration_unable if state < 3
				* ration_unable: whether the household has ever tried to collect ration and were not able to in the past 3 months
			replace exclusion_AP_RJ = 1 if lessthanthree_4 == 1 | lessthanthree_5 == 1 | lessthanthree_21  == 1 | lessthanthree_3  == 1 | lessthanthree_6  == 1 
				* among those who went less than 3 times to collect in the past 3 months, reason why they went less than 3 times
				* these are reasons that indicate exclusion (rather than ineligibility or not wanting to collect)
				* (rations are collected ONCE a month in AP and RJ)
			
			* Exclusion in WB
			gen exclusion_WB = .
			replace exclusion_WB = ration_unable if state == 3
			replace exclusion_WB = 1 if lessthansix_3 == 1 | lessthansix_21 == 1 
				* among those who went less than 6 times to collect in the past 3 months, reason why they went less than 6 times
				* these are reasons that indicate exclusion (rather than ineligibility or not wanting to collect)
				* (rations are collected TWICE a month in WB)
			
			* Combining all 3 states
			gen exclusion = .
			replace exclusion = exclusion_AP_RJ if state < 3
			replace exclusion = exclusion_WB if state == 3
			replace exclusion = 0 if exclusion != 1 & !missing(rcnumofvisits) & exclusion != .d & exclusion !=.r & exclusion !=.e
			replace exclusion = 0 if lessthanthree_11 == 1 | lessthanthree_13 == 1
			replace exclusion = .e if state == 1 & rcnumofvisits == 0
			replace exclusion = .e if lessthanthree_2 == .e
			replace exclusion = . if !missing(exclusion) & missing(rcnumofvisits) 
			replace exclusion = . if missing(rcnumofvisits) & exclusion != .
			label values exclusion yesno
		
			* Fixing "ration_unable_why_`i'" (reasons for being unable to collect ration) based on reasons why
				* they collected ration less than 3 or 6 times in the past 3 months
			replace ration_unable_why_1 = 1 if lessthanthree_3 == 1 
			replace ration_unable_why_1 = 1 if lessthansix_3 == 1
			
			replace ration_unable_why_2 = 1 if lessthanthree_6 == 1 | lessthanthree_5 == 1
			
			replace ration_unable_why_3 = 1 if lessthanthree_4 == 1 
			
			replace ration_unable_why_9 = 1 if lessthanthree_21 == 1
			replace ration_unable_why_9 = 1 if lessthansix_21 == 1 
		
		
		* Step 2: Categories of reasons for exclusion

			* 1) Ration being unavailable
			gen exclusion_bucket_1 = 1 if  ration_unable_why_9 == 1 | ration_unable_why_26 == 1 | ration_unable_why_28 == 1 | ration_unable_why_101 == 1
			
			* 2) Reasons related to Aadhaar seeding
			gen exclusion_bucket_2 = 1 if  ration_unable_why_1 == 1 | ration_unable_why_2 == 1 
			
			* 3) Aadhaar authentication failures
			gen exclusion_bucket_3 = 1 if  ration_unable_why_5 == 1 | ration_unable_why_6 == 1 | ration_unable_why_7 == 1 
			
			* 4) Reasons related to connectivity issues
			gen exclusion_bucket_4 = 1 if  ration_unable_why_4 == 1 
			
			* 5) Reasons related to unavailability of PoS-able members
			gen exclusion_bucket_5 = 1 if  ration_unable_why_3 == 1 
			
			* 6) Other
			gen exclusion_bucket_6 = 1 if  ration_unable_why_21 == 1 | ration_unable_why_22 == 1 | ration_unable_why_27 == 1 | ration_unable_why_29 == 1 | ration_unable_why_100 == 1 | ration_unable_why_102 == 1 | ration_unable_why_10 == 1 | ration_unable_why_226 == 1
			
			* 7) Don't know
			gen exclusion_bucket_99 = 1 if ration_unable_why__999 == 1 | ration_unable_why_err == 1
			
			* Converting to dummies
			replace exclusion_bucket_1 = 0 if exclusion == 1 & exclusion_bucket_1 != 1
			replace exclusion_bucket_2 = 0 if exclusion == 1 & exclusion_bucket_2 != 1
			replace exclusion_bucket_3 = 0 if exclusion == 1 & exclusion_bucket_3 != 1
			replace exclusion_bucket_4 = 0 if exclusion == 1 & exclusion_bucket_4 != 1
			replace exclusion_bucket_5 = 0 if exclusion == 1 & exclusion_bucket_5 != 1
			replace exclusion_bucket_6 = 0 if exclusion == 1 & exclusion_bucket_6 != 1
			replace exclusion_bucket_99 = 0 if exclusion == 1 & exclusion_bucket_99 != 1 
			
			* Labelling
			foreach var of varlist exclusion_bucket_* {
				label values `var' yesno
				}
			label variable exclusion "Exclusion in PDS over 3 months"
			label variable exclusion_bucket_1 "No ration available"
			label variable exclusion_bucket_2 "Aadhaar seeding"
			label variable exclusion_bucket_3 "Aadhaar authentication failures"
			label variable exclusion_bucket_4 "Connectivity (/electricity) issues"
			label variable exclusion_bucket_5 "Not PoS-able member available"
			label variable exclusion_bucket_6 "Other"
			label variable exclusion_bucket_99 "Don't know/Refused to answer"
			
		* Step 3: Aadhaar-related exclusion and non-Aadhaar related exclusion
			
			* Aadhaar-related exclusion
			gen aadhaar_relatede_exclusionnew = 0 if !missing(exclusion)
			replace aadhaar_relatede_exclusionnew = 1 if exclusion_bucket_2 == 1 | exclusion_bucket_3 == 1 | exclusion_bucket_4 == 1 | exclusion_bucket_5 == 1 
			
			* non-Aadhaar related exclusion
			gen nonaadhaar_relatede_exclusionnew = 0 if !missing(exclusion)
			replace nonaadhaar_relatede_exclusionnew = 1 if exclusion_bucket_1 == 1 | exclusion_bucket_6 == 1 
			
			* Exclusion due to both reasons
			gen bothadandnonad = 0 if !missing(exclusion)
			replace bothadandnonad = 1 if nonaadhaar_relatede_exclusionnew == 1 & aadhaar_relatede_exclusionnew == 1
			replace bothadandnonad = -999 if exclusion_bucket_99 == 1
			
			* Replacing "Aadhaar-related exclusion" and "non-Aadhaar-related exclusion" to 0 if exclusion is due to both
			replace aadhaar_relatede_exclusionnew = 0 if bothadandnonad == 1 | bothadandnonad == -999
			replace nonaadhaar_relatede_exclusionnew = 0 if bothadandnonad == 1 | bothadandnonad == -999
			
			* Creating one variable for reasons for exclusion incorporating all 3 categories
			gen exclusionadnonadboth = .
			replace exclusionadnonadboth = 1  if aadhaar_relatede_exclusionnew == 1
			replace exclusionadnonadboth = 2 if nonaadhaar_relatede_exclusionnew == 1
			replace exclusionadnonadboth = 3 if bothadandnonad == 1
			replace exclusionadnonadboth = -999 if bothadandnonad == -999
			label define exclusionadnonadboth 1 "Aadhaar related factors" 2 "Non Aadhaar related factors" 3 "Both" -999 "Do not know"
			label values exclusionadnonadboth exclusionadnonadboth
			label variable exclusionadnonadboth "Reasons for exclusion"
			
			* Replacing exclusion to missing when the reasons indicate that the household was not excluded 
			replace exclusion = .e if  ration_unable_why_23 == 1 & (exclusion_bucket_1 != 1 | exclusion_bucket_2 != 1 | exclusion_bucket_3 != 1 | exclusion_bucket_4 != 1 | exclusion_bucket_5 != 1 | exclusion_bucket_6 != 1 | exclusion_bucket_99 != 1)
			replace exclusion = 0 if ration_unable_why_24 == 1 
			replace exclusion = 0 if ration_unable_why_25 == 1 & ration_unable_why_9 != 1
			
			replace exclusionadnonadboth = .e if exclusion == 1 & exclusionadnonadboth ==.
			replace exclusionadnonadboth = . if exclusion != 1 & !missing(exclusionadnonadboth)
					
			foreach var of varlist exclusion_bucket_* {
				replace `var' = .e if exclusion == 1 & `var' ==.
				replace `var' = . if exclusion != 1 & !missing(`var')
				}

		
		* Step 4: Calculate average monthly exclusion rate
			* i.e. dividing the number of times someone was excluded in the past 3 months by 3
				
			* Assumption 1: For cases where the number of times someone wasn't able to collect ration is greater than 3 we replace to 3
			replace ration_unable_times = 3 if ration_unable_times == 5 & state == 2 
			replace ration_unable_times = 0 if exclusion == 0 & ration_unable_times != .d & ration_unable_times != .r & ration_unable_times != .e
			
			* Assumption 2: For cases where the number of times someone wasn't able to collect ration is greater than 3
				* we calculate number of times the individual was excluded by subtracting the number of visits from 3
			replace ration_unable_times = (3 - rcnumofvisits) if exclusion == 1 & ration_unable_times ==. & state < 3
			replace ration_unable_times = 3 if ration_unable_times == 6 & state == 1
			gen exclusion_times_weighted = (ration_unable_times/3) if state < 3
			
			* For West Bengal we do the above with 6 instead of 3
			replace ration_unable_times = 6 if ration_unable_times > 6 & !missing(ration_unable_times)
			replace ration_unable_times = (6 - rcnumofvisits) if exclusion == 1 & ration_unable_times ==. & state == 3
			
			* Missing values
			replace exclusion_times_weighted = (ration_unable_times/6) if state ==  3
			replace exclusion_times_weighted = .d if ration_unable_times == .d
			replace exclusion_times_weighted = .e if ration_unable_times == .e
			replace exclusion_times_weighted = .r if ration_unable_times == .r
			replace exclusion_times_weighted = .d if exclusion == .d
			replace exclusion_times_weighted = .e if exclusion == .e
			replace exclusion_times_weighted = .r if exclusion == .r
			
			label variable exclusion_times_weighted "Monthly exclusion rate"
		
			* Converting .e to .
			local var3 exclusion_times_weighted ration_unable_times exclusion exclusionadnonadboth
			foreach var in `var3' {
			replace `var' = . if `var' == .e
			}
			
/*****************************************************************************
7. User awareness
*****************************************************************************/						
		
		* Recoding scale to match across all three states
			* In AP we used a 5 point scale but in Rajasthan and West Bengal we had a three point scale			
		label define importance 2 "Important" 3 "Neutral" 4 "Not important"
		label define approval 2 "Approve" 3 "Neutral" 4 "Disapprove"
		label define ease 2 "Easier" 3 "Neither easier nor more difficult/No Change" 4 "More difficult"
		
		local privacyimpvar govshare privshare govbioshare privbioshare govadshare privadshare 
		foreach var in `privacyimpvar'{
			recode `var' (1=2) (5=4)
			tab (`var'), gen(`var')
			label values `var' importance
			}
		
		local privacycompvar govcompulsion privcompulsion
		foreach var in `privacycompvar'{
			recode `var' (1=2) (5=4)
			tab (`var'), gen(`var')
			label values `var' approval
			}

			
/*****************************************************************************
8. NREGA
*****************************************************************************/
		
	* Creating categories for number of job cards a household has
		gen jobcard_num_c =.
		replace jobcard_num_c = 1 if nrega == 0
		replace jobcard_num_c = 2 if jobcard_num == 1
		replace jobcard_num_c = 3 if jobcard_num > 1 & !missing(jobcard_num)
		replace jobcard_num_c = jobcard_num if jobcard_num == .d | jobcard_num == .r 
		replace jobcard_num_c = nrega if nrega == .d | nrega == .r		
		label define jobcard_num_ccc  1 "0" 2 "1" 3 "Greater than 1"
		label values jobcard_num_c jobcard_num_ccc
		label variable jobcard_num_c "Number of jobcards"
		
	* Recoding scale to match across all three states
		* In AP we used a 5 point scale but in Rajasthan and West Bengal we had a three point scale
		recode nrega_ease (1=2) (5=4)
		recode ad_overallease (1=2) (5=4)
		
	* Generating a dummy variable indicating respondents who receive their NREGA wages directly in their Aadhaar seeded account
		* In our survey we ask this question only to people who have multiple accounts in the NREGA section 
		* because we have already asked this question in the banking section
		* hence here we need to incorporate those with a single bank account 
		* and receive their NREGA wages directly in their Aadhaar seeded account
		gen adseededbank_nrega =.
		replace adseededbank_nrega = nrega_dbt_seeded
		replace adseededbank_nrega = 1 if nrega_dbt == 1 & bank_aadhaar_single == 1
		replace adseededbank_nrega =.d if nrega_dbt == 1 & bank_aadhaar_single == .d
		replace adseededbank_nrega =.r if nrega_dbt == 1 & bank_aadhaar_single == .r
		replace adseededbank_nrega = 0 if adseededbank_nrega != 1 & nrega_dbt == 1 & adseededbank_nrega !=.	
		
	* Generating a dummy variable indicating respondents who were unable to withdraw their NRGEA wages from a microATM
		* The variable nrega_microparesponse = 4 refers to those residents who were unable to withdraw their wages in cash at all. 
		* The denomincator for this varibale is those respondents who used a microATM to withdraw their wages in Andhra Pradesh		
		gen exclusion_microATM = .
		replace exclusion_microATM = 1 if nrega_microsamedayprob == 1 & nrega_microparesponse == 4
		replace exclusion_microATM = 0 if nrega_microsamedayprob == 0 & exclusion_microATM != 1
		replace exclusion_microATM = 0 if nrega_microsamedayprob == 1 & exclusion_microATM != 1
		replace exclusion_microATM = nrega_microsamedayprob if 	nrega_microsamedayprob == .d | nrega_microsamedayprob == .r
		
		label define exclusion 1 "Yes" 0 "No"
		label values exclusion_microATM exclusion
		
		
/*****************************************************************************
9. Remaning processing
*****************************************************************************/		
		
	* Labelling districts for districts
		
		replace district_id = subinstr(district_id, "33_", "", .)		
		destring district_id, replace
		label define districts 1 "Bankura" 2"Hooghly" 3 "Jalpaiguri" 4 "Murshidabad" ///
			5 "Purba Bardhaman" 6 "Purbo Medinipur" 7 "South 24 Parganas"	///
			11 "Srikakulam" 12 "West Godavari" 13 "Krishna" 14 "Nellore" 15 "Kadapa" ///
			16 "Anantapur" 21 "Alwar" 22 "Sawai Madhopur" 23 "Tonk" 24 "Jaipur" ///
			25 "Nagaur" 26 "Jodhpur" 27 "Udaipur" 28 "Ganganagar"
		label values district_id districts 

		drop district ac ac_id 


	* Labelling some remaining variables
		
		label variable ad_overalldifficult_2 "I am made to link my Aadhaar card to many things"
		label variable ad_bankopenhow__999 "Don't Know"
		label variable ration_unable_why_226 "I was unaware that ration was being distributed"
		label variable whynowork__999 "Don't know"
		label variable microproblems__999 "Don't know"
		label variable maximum_edu_HH "Maximum education level attained in the household"
		
		label define norectify 1 "I did not know I could fix the error" 2 "I did not want to give the card back" 3 "The error on the Aadhaar card does not affect me in any way" 
		label define norectify 4 "I did not want to have to wait for a new card to come" 5 "I didn’t have the time" 6 "There are errors in my other documents" 7 "I do not know how to fix the error" 8 "I heard from others that it costs too much money", modify
		label define norectify 9 "The enrolment centre is far" 10 "I did not try due to health concerns/disability" 11 "Supporting documents were not available" 100 "It is not convenient for me to go fix the error", modify 
		label values noadrectifywhy norectify

	
*** Saving
		
	save "SOA2018_nonroster_cleaned_gen.dta", replace
	outsheet using "SOA2018_nonroster_cleaned_gen.csv", comma replace
